##### When Co-Ethnicity Fails: Replication Code

#### Load Packages

library(stargazer)
library(ggplot2)
library(ggpubr)
library(lmtest)
library(multiwayvcov)

#### Load Data

data <- read.csv("When Co-ethnicity Fails_World Politics.csv") #all X.sd variables are standardized from 0 to 1

#### Figure 3 
### Scatter plots showing association in Figure 2(c) ###

PGQplot <- ggscatter(data, x = "afrodes.prop.2000", y = "pg.qual.2000.sd", ylim = c(0, 1), xlim = c(0, 1), size = 0.9,
                     xlab = "Afro-descendants, share", ylab = "Public Goods Quality", color = "grey60",
          add = "reg.line", add.params = list(color = "black"), conf.int = TRUE, 
) + stat_cor(method = "pearson", label.x = 0.35, label.y = 0.005)

# Breakdown of Public Goods

EDplot <- ggscatter(data, x = "afrodes.prop.2000", y = "educ_quality.2000", size = 0.9, #ylim = c(0, 1), xlim = c(0, 1),  
                     xlab = "Afro-descendants, share", ylab = "Education Quality", color = "grey60",
                     add = "reg.line", add.params = list(color = "black"), conf.int = TRUE, 
) + stat_cor(method = "pearson", label.x = 0.35, label.y = 0.95)

HEALTHplot <- ggscatter(data, x = "afrodes.prop.2000", y = "health_qual.2000", size = 0.9, #ylim = c(0, 1), xlim = c(0, 1), 
                    xlab = "", ylab = "Healthcare Quality", color = "grey60",
                    add = "reg.line", add.params = list(color = "black"), conf.int = TRUE, 
) + stat_cor(method = "pearson", label.x = 0.35, label.y = 0.95)

BASICplot <- ggscatter(data, x = "afrodes.prop.2000", y = "basic.goods.2000", size = 0.9, #ylim = c(0, 1), xlim = c(0, 1), 
                       xlab = "", ylab = "Access to Basic Public Goods", color = "grey60",
                       add = "reg.line", add.params = list(color = "black"), conf.int = TRUE, 
) + stat_cor(method = "pearson", label.x = 0.35, label.y = 0.95)

ggarrange(PGQplot, EDplot, HEALTHplot, BASICplot,
          labels = c("", "", "", ""),
          ncol = 2, nrow = 2)
#ggsave("scatter_plots.png")

######################################################

# Tables

### Descriptive statistics (Table A1)

stargazer(data[c("afrodes.prop.2000", "afrodes.prop.1980", "afrodes.prop.1940",
                 "ln.pc.receita2000", "educ_quality.2000", "health_qual.2000", "pc.pop.electricity.2000", "pc.pop.ENwater.2000", "pc.pop.garbage.2000",
                 "ln.pc.receita1920", "quil.log.tot", "ln.rail1920",  "ln.remoteness",
                 "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude")],
          covariate.labels=c("Afro-descendants \\% (2000)", "Afro-descendants \\% (1980)", "Afro-descendants \\% (1960)",
                             "Tax Revenues PC (logged, 2000)", "Education Quality (2000)", "Healthcare Quality (2000)", "Access to Electricity (2000)", "Access to Piped Water (2000)", "Access to Garbage Collection (2000)", 
                             "Tax Revenues PC (logged, 1920)", "Number of Quilombos (logged)", "Railroad Number (logged)", "Geographic Remoteness (logged)",
                             "Area Size (logged)", "Altitude", "Rainfall", "Sunshine", "Distance to Coast (logged)", "Distance to Capital (logged)",
                             "Longitude", "Latitude"), 
          nobs = FALSE, digits=2,
          title="Full Sample (n = 5505)", iqr = TRUE)

### Table 1: Past Capacity --> current % Afrodes & current PG

## Past Capacity --> current % Afrodes

m0t <- lm(afrodes.prop.2000 ~ ln.pc.receita1920.sd, data)
m1t <- lm(afrodes.prop.2000 ~ ln.pc.receita1920.sd +
            state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m1ta <- lm(afrodes.prop.2000 ~ ln.pc.receita1920.sd +
                               ln.pc.pib1920.sd + 
                               state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

## Past Capacity --> current PG

m0a <- lm(pg.2000.sd ~ ln.pc.receita1920.sd, data)
m1a <- lm(pg.2000.sd ~ ln.pc.receita1920.sd +
            state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m1aa <- lm(pg.2000.sd ~ ln.pc.receita1920.sd + 
             ln.pc.pib1920.sd +
             state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

# cluster-robust standard errors

cl.se.m0t <- sqrt(diag(cluster.vcov(m0t, data$amc1920)))
cl.se.m1t <- sqrt(diag(cluster.vcov(m1t, data$amc1920)))
cl.se.m1ta <- sqrt(diag(cluster.vcov(m1ta, data$amc1920)))

cl.se.m0a <- sqrt(diag(cluster.vcov(m0a, data$amc1920)))
cl.se.m1a <- sqrt(diag(cluster.vcov(m1a, data$amc1920)))
cl.se.m1aa <- sqrt(diag(cluster.vcov(m1aa, data$amc1920)))

stargazer(m0a, m1a, m0t, m1t, dep.var.caption = "",
          se=list(cl.se.m0a, cl.se.m1a, cl.se.m0t, cl.se.m1t), 
          title="Present Public Goods Outcomes and Racial Demography as a Function of Past State Capacity", 
          dep.var.labels=c("Public Goods, ", "Afro-descendants \\%, "),
          covariate.labels=c("Fiscal Capacity, 1920"),
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes")))

### Table 2: Past state capacity <--> Quilombos

m0 <- lm(ln.pc.receita1920.sd ~ quil.sd, data)
m1 <- lm(ln.pc.receita1920.sd ~ quil.sd +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

# cluster-robust standard errors

cl.se.m0 <- sqrt(diag(cluster.vcov(m0, data$amc1920)))
cl.se.m1 <- sqrt(diag(cluster.vcov(m1, data$amc1920)))

stargazer(m0, m1, dep.var.caption = "",
          se=list(cl.se.m0, cl.se.m1), 
          title="Past State Capacity and Quilombos", 
          dep.var.labels=c("Fiscal Capacity, "),
          covariate.labels=c("Quilombo Presence"),
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes"),
                           c("Geographic controls", "No", "Yes")))

### Table 3: Quilombos --> Current % Afrodes

m2 <- lm(afrodes.prop.2000 ~ quil.sd, data)
m3 <- lm(afrodes.prop.2000 ~ quil.sd +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

# cluster-robust standard errors

cl.se.m2 <- sqrt(diag(cluster.vcov(m2, data$amc1920)))
cl.se.m3 <- sqrt(diag(cluster.vcov(m3, data$amc1920)))

stargazer(m2, m3, dep.var.caption = "", 
          se=list(cl.se.m2, cl.se.m3), 
          title="Quilombos and Contemporary Share of Afro-descendants", 
          dep.var.labels=c("Afro-descendants \\%, "),
          covariate.labels=c("Quilombo Presence"),
          align=TRUE, no.space=TRUE, column.sep.width = "20pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes"),
                           c("Geographic controls", "No", "Yes")))

###########################
###  Robustness checks  ###
###########################

#### Table A2: Afrodes and breakdown of public goods

m0ed <- lm(educ.qual.2000.sd ~ afrodes.prop.2000, data)
m1ed <- lm(educ.qual.2000.sd ~ afrodes.prop.2000 + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
cl.se.m0ed <- sqrt(diag(cluster.vcov(m0ed, data$amc1920)))
cl.se.m1ed <- sqrt(diag(cluster.vcov(m1ed, data$amc1920)))

m0h <- lm(heal.qual.2000.sd ~ afrodes.prop.2000, data)
m1h <- lm(heal.qual.2000.sd ~ afrodes.prop.2000 + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
cl.se.m0h <- sqrt(diag(cluster.vcov(m0h, data$amc1920)))
cl.se.m1h <- sqrt(diag(cluster.vcov(m1h, data$amc1920)))

m0b <- lm(basic.goods.2000.sd ~ afrodes.prop.2000, data)
m1b <- lm(basic.goods.2000.sd ~ afrodes.prop.2000 + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
cl.se.m0b <- sqrt(diag(cluster.vcov(m0h, data$amc1920)))
cl.se.m1b <- sqrt(diag(cluster.vcov(m1h, data$amc1920)))

stargazer(m0ed, m1ed, m0h, m1h, m0b, m1b, dep.var.caption = "",
          se=list(cl.se.m0ed, cl.se.m1ed, cl.se.m0h, cl.se.m1h, cl.se.m0b, cl.se.m1b), 
          title="Present Public Goods Outcomes and Afro-descendant Shares of Population", 
          dep.var.labels=c("Education, ", "Health, ", "Access to Basic Goods, "),
          covariate.labels=c("Afro-descendants \\%, 2000"),
          no.space=TRUE, column.sep.width = "5pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes", "No", "Yes")))

### Table A4: State capacity and breakdown of public goods

m0ed <- lm(educ.qual.2000.sd ~ ln.pc.receita1920.sd, data)
m1ed <- lm(educ.qual.2000.sd ~ ln.pc.receita1920.sd + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.m0ed <- sqrt(diag(cluster.vcov(m0ed, data$amc1920)))
cl.se.m1ed <- sqrt(diag(cluster.vcov(m1ed, data$amc1920)))

m0h <- lm(heal.qual.2000.sd ~ ln.pc.receita1920.sd, data)
m1h <- lm(heal.qual.2000.sd ~ ln.pc.receita1920.sd + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.m0h <- sqrt(diag(cluster.vcov(m0h, data$amc1920)))
cl.se.m1h <- sqrt(diag(cluster.vcov(m1h, data$amc1920)))

m0b <- lm(basic.goods.2000.sd ~ ln.pc.receita1920.sd, data)
m1b <- lm(basic.goods.2000.sd ~ ln.pc.receita1920.sd + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.m0b <- sqrt(diag(cluster.vcov(m0b, data$amc1920)))
cl.se.m1b <- sqrt(diag(cluster.vcov(m1b, data$amc1920)))

stargazer(m0ed, m1ed, m0h, m1h, m0b, m1b, dep.var.caption = "",
          se=list(cl.se.m0ed, cl.se.m1ed, cl.se.m0h, cl.se.m1h, cl.se.m0b, cl.se.m1b), 
          title="Public Outcomes and Past State Capacity", 
          dep.var.labels=c("Education, ", "Health, ", "Access to Basic Goods, "),
          covariate.labels=c("Fiscal Capacity, 1920"),
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes", "No", "Yes")))

### Table A5: Past state capacity <--> Present state capacity

m0c <- lm(ln.pc.receita2000.sd ~ ln.pc.receita1920.sd, data)
m1c <- lm(ln.pc.receita2000.sd ~ ln.pc.receita1920.sd +
            state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m1ca <- lm(ln.pc.receita2000.sd ~ ln.pc.receita1920.sd +
             ln.pc.pib1920.sd +
             state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

m0d <- lm(cadaster ~ ln.pc.receita1920.sd, data)
m1d <- lm(cadaster ~ ln.pc.receita1920.sd +
            state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m1da <- lm(cadaster ~ ln.pc.receita1920.sd +
             ln.pc.pib1920.sd +
             state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.m0c <- sqrt(diag(cluster.vcov(m0c, data$amc1920)))
cl.se.m1c <- sqrt(diag(cluster.vcov(m1c, data$amc1920)))
cl.se.m1ca <- sqrt(diag(cluster.vcov(m1ca, data$amc1920)))

cl.se.m0d <- sqrt(diag(cluster.vcov(m0d, data$amc1920)))
cl.se.m1d <- sqrt(diag(cluster.vcov(m1d, data$amc1920)))
cl.se.m1da <- sqrt(diag(cluster.vcov(m1da, data$amc1920)))

stargazer(m0c, m1c, m0d, m1d, dep.var.caption = "", 
          se=list(cl.se.m0c, cl.se.m1c),
          label = "tab:pastcapacitypresentcapacity",
          title="Past and Present State Capacity", 
          dep.var.labels=c("Fiscal Capacity, ", "Cadaster Registartion, "),
          covariate.labels=c("Fiscal Capacity, 1920"),
          align=TRUE, no.space=TRUE, column.sep.width = "20pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes")))

### Table A9: Past Capacity --> current % Afrodes & current PG without quilombo municipalities

m0t <- lm(afrodes.prop.2000 ~ ln.pc.receita1920.sd, data[data$quil.sd == 0,])
m1t <- lm(afrodes.prop.2000 ~ ln.pc.receita1920.sd +
                              state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data[data$quil.sd == 0,])

m0a <- lm(pg.2000.sd ~ ln.pc.receita1920.sd, data[data$quil.sd == 0,])
m1a <- lm(pg.2000.sd ~ ln.pc.receita1920.sd + 
                       state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data[data$quil.sd == 0,])

cl.se.m0t <- sqrt(diag(cluster.vcov(m0t, data[data$quil.sd == 0,]$amc1920)))
cl.se.m1t <- sqrt(diag(cluster.vcov(m1t, data[data$quil.sd == 0,]$amc1920)))

cl.se.m0a <- sqrt(diag(cluster.vcov(m0a, data[data$quil.sd == 0,]$amc1920)))
cl.se.m1a <- sqrt(diag(cluster.vcov(m1a, data[data$quil.sd == 0,]$amc1920)))

stargazer(m0a, m1a, m0t, m1t, dep.var.caption = "",
          se=list(cl.se.m0a, cl.se.m1a, cl.se.m0t, cl.se.m1t), 
          title="Public Goods Outcomes and Racial Demography as a Function of Past State Capacity (excluding Quilombo Municipalities)", 
          dep.var.labels=c("Public Goods, ", "Afro-descendants \\%, "),
          covariate.labels=c("Fiscal Capacity (1920)"),
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes")))

# TABLE A12: With 1920 income as control

stargazer(m0c, m1c, m1ca, m0a, m1a, m1aa, m0t, m1t, m1ta, dep.var.caption = "",
          se=list(cl.se.m0a, cl.se.m1a, cl.se.m1aa, cl.se.m0t, cl.se.m1t, cl.se.m1ta), 
          title="Present Public Outcomes and Racial Demography as a Function of Past State Capacity (Accounting for Past Municipal Income Per Capita)", 
          dep.var.labels=c("Fiscal Capacity, 2000", "Public Goods, 2000", "Afro-descendants \\%, 2000"),
          covariate.labels=c("Fiscal Capacity, 1920"),
          align=TRUE, no.space=TRUE, column.sep.width = "-5pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant", "ln.pc.pib1920.sd"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "Yes", "No", "Yes", "Yes", "No", "Yes", "Yes"),
                           c("Geographic controls","No", "Yes", "Yes", "No", "Yes", "Yes", "No", "Yes", "Yes"),
                           c("GDP PC, 1920 (log)", "No", "No", "Yes", "No", "No", "Yes", "No", "No", "Yes")))

###### Appendix (C-D)

### Total Afrodes prop

a1 <- lm(ln.pc.receita1920.sd ~ afrodes.prop.1872 +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
a2 <- lm(afrodes.prop.2000 ~ afrodes.prop.1872
           + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
a3 <- lm(quil.sd ~ afrodes.prop.1872 
           + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
a4 <- lm(pg.2000.sd ~ afrodes.prop.1872
           + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
cl.se.a1 <- sqrt(diag(cluster.vcov(a1, data$amc1872)))
cl.se.a2 <- sqrt(diag(cluster.vcov(a2, data$amc1920)))
cl.se.a3 <- sqrt(diag(cluster.vcov(a3, data$amc1920)))
cl.se.a4 <- sqrt(diag(cluster.vcov(a4, data$amc1920)))

### Free Afrodes prop

b1 <- lm(ln.pc.receita1920.sd ~ free.afrodes.prop.1872 +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
b2 <- lm(afrodes.prop.2000 ~ free.afrodes.prop.1872 
           + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
b3 <- lm(quil.sd ~ free.afrodes.prop.1872
           + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
b4 <- lm(pg.2000.sd ~ free.afrodes.prop.1872
           + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.b1 <- sqrt(diag(cluster.vcov(b1, data$amc1872)))
cl.se.b2 <- sqrt(diag(cluster.vcov(b2, data$amc1920)))
cl.se.b3 <- sqrt(diag(cluster.vcov(b3, data$amc1872)))
cl.se.b4 <- sqrt(diag(cluster.vcov(b4, data$amc1920)))

### Slave.prop

c1 <- lm(ln.pc.receita1920.sd ~ slave.prop.1872 + 
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
c2 <- lm(afrodes.prop.2000 ~ slave.prop.1872
           + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
c3 <- lm(quil.sd ~ slave.prop.1872 +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
c4 <- lm(pg.2000.sd ~ slave.prop.1872 
           + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.c1 <- sqrt(diag(cluster.vcov(c1, data$amc1872)))
cl.se.c2 <- sqrt(diag(cluster.vcov(c2, data$amc1920)))
cl.se.c3 <- sqrt(diag(cluster.vcov(c3, data$amc1872)))
cl.se.c4 <- sqrt(diag(cluster.vcov(c4, data$amc1920)))

# Table A10: Racial Demography and Quilombos
stargazer(a3, b3, c3, dep.var.caption = "",  
          se=list(cl.se.a3, cl.se.b3, cl.se.c3), 
          title="Past Racial Demography and Quilombos", 
          dep.var.labels=c("Quilombo Presence"),
          covariate.labels=c("Total Afro-descendants \\%, 1872", "Free Afro-descendants \\%, 1872", "Enslaved Afro-descendants \\%, 1872"),
          no.space=TRUE, column.sep.width = "-10pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "Yes", "Yes", "Yes"),
                           c("Geographic controls", "Yes", "Yes", "Yes")))

# Table D1: 1872 Afrodes (free and enslaved) and Present Afrodes + Quilombos
stargazer(a2, b2, c2, a4, b4, c4, dep.var.caption = "",  
          se=list(cl.se.a2, cl.se.b2, cl.se.c2, cl.se.a4, cl.se.b4, cl.se.c4), 
          title="Past Racial Demography, Present Racial Demography and Public Goods", 
          dep.var.labels=c("Afro-descendants \\%, ", "Public Goods, "),
          covariate.labels=c("Total Afro-descendants \\%, 1872", "Free Afro-descendants \\%, 1872", "Enslaved Afro-descendants \\%, 1872"),
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
                           c("Geographic controls", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")))

# Table D2: Past Racial Demography and State Capacity
stargazer(a1, b1, c1, dep.var.caption = "",  
          se=list(cl.se.a1, cl.se.b1, cl.se.c1), 
          title="Past Racial Demography and State Capacity", 
          dep.var.labels=c("Tax revenues per capita (1920)"),
          covariate.labels=c("Total Afro-descendants \\%, 1872", "Free Afro-descendants \\%, 1872", "Enslaved Afro-descendants \\%, 1872"),
          no.space=TRUE, column.sep.width = "-10pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "Yes", "Yes", "Yes"),
                           c("Geographic controls", "Yes", "Yes", "Yes")))


##############
# PERSISTENCE
##############

#### Past Capacity --> 1940

m0a <- lm(afrodes.prop.1940 ~ ln.pc.receita1920.sd, data)
m0b <- lm(afrodes.prop.1940 ~ ln.pc.receita1920.sd + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m1a <- lm(afrodes.prop.1980 ~ ln.pc.receita1920.sd, data)
m1b <- lm(afrodes.prop.1980 ~ ln.pc.receita1920.sd + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m2a <- lm(afrodes.prop.2000 ~ ln.pc.receita1920.sd, data)
m2b <- lm(afrodes.prop.2000 ~ ln.pc.receita1920.sd + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

m3a <- lm(afrodes.prop.19401980 ~ ln.pc.receita1920.sd, data)
m3b <- lm(afrodes.prop.19401980 ~ ln.pc.receita1920.sd + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m3c <- lm(afrodes.prop.19401980 ~ ln.pc.receita1920.sd + afrodes.prop.1940 + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

m4a <- lm(afrodes.prop.19401980.rel ~ ln.pc.receita1920.sd, data)
m4b <- lm(afrodes.prop.19401980.rel ~ ln.pc.receita1920.sd + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m4c <- lm(afrodes.prop.19401980.rel ~ ln.pc.receita1920.sd + afrodes.prop.1940 + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

m5a <- lm(afrodes.prop.19402000 ~ ln.pc.receita1920.sd, data)
m5b <- lm(afrodes.prop.19402000 ~ ln.pc.receita1920.sd + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m5c <- lm(afrodes.prop.19402000 ~ ln.pc.receita1920.sd + afrodes.prop.1940 + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

m6a <- lm(afrodes.prop.19402000.rel ~ ln.pc.receita1920.sd, data)
m6b <- lm(afrodes.prop.19402000.rel ~ ln.pc.receita1920.sd + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m6c <- lm(afrodes.prop.19402000.rel ~ ln.pc.receita1920.sd + afrodes.prop.1940 + state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.m0a <- sqrt(diag(cluster.vcov(m0a, data$amc1920)))
cl.se.m0b <- sqrt(diag(cluster.vcov(m0b, data$amc1920)))
cl.se.m1a <- sqrt(diag(cluster.vcov(m1a, data$amc1920)))
cl.se.m1b <- sqrt(diag(cluster.vcov(m1b, data$amc1920)))
cl.se.m2a <- sqrt(diag(cluster.vcov(m2a, data$amc1920)))
cl.se.m2b <- sqrt(diag(cluster.vcov(m2b, data$amc1920)))

cl.se.m3a <- sqrt(diag(cluster.vcov(m3a, data$amc1920)))
cl.se.m3b <- sqrt(diag(cluster.vcov(m3b, data$amc1920)))
cl.se.m3c <- sqrt(diag(cluster.vcov(m3c, data$amc1920)))

cl.se.m4a <- sqrt(diag(cluster.vcov(m4a, data$amc1920)))
cl.se.m4b <- sqrt(diag(cluster.vcov(m4b, data$amc1920)))
cl.se.m4c <- sqrt(diag(cluster.vcov(m4c, data$amc1920)))

cl.se.m5a <- sqrt(diag(cluster.vcov(m5a, data$amc1920)))
cl.se.m5b <- sqrt(diag(cluster.vcov(m5b, data$amc1920)))
cl.se.m5c <- sqrt(diag(cluster.vcov(m5c, data$amc1920)))

cl.se.m6a <- sqrt(diag(cluster.vcov(m6a, data$amc1920)))
cl.se.m6b <- sqrt(diag(cluster.vcov(m6b, data$amc1920)))
cl.se.m6c <- sqrt(diag(cluster.vcov(m6c, data$amc1920)))

### Table 4: 1920 taxes --> 1940-2000 % Afrodes
stargazer(m0a, m0b, m1a, m1b, m2a, m2b, dep.var.caption = "",
          se=list(cl.se.m0a, cl.se.m0b, cl.se.m1a, cl.se.m1b, cl.se.m2a, cl.se.m2b), 
          title="Racial Demography as a Function of Past State Capacity (1940-2000)", 
          dep.var.labels=c("Afro-des. \\%, 1940", "Afro-des. \\%, 1980", "Afro-des. \\%, 2000"),
          covariate.labels=c("Fiscal Capacity, 1920"),
          align=TRUE, no.space=TRUE, column.sep.width = "-10pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes", "No", "Yes")))

### Table 5: 1920 taxes --> Change in 1940-2000 % Afrodes
stargazer(m5a, m5b, m5c, m6a, m6b, m6c, dep.var.caption = "",
          se=list(cl.se.m5a, cl.se.m5b, cl.se.m5c, cl.se.m6a, cl.se.m6b, cl.se.m6c), 
          title="Racial Demographic Change as a Function of Past State Capacity (1940-2000)", 
          dep.var.labels=c("Abs. \\Delta, 1940-2000)", "Rel. \\Delta, 1940-2000"),
          covariate.labels=c("Fisc. Cap., 1920", "Afro-des. \\%, 1940"),
          align=TRUE, no.space=TRUE, column.sep.width = "-10pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "Yes",  "No", "Yes", "Yes"),
                           c("Geographic controls", "No", "Yes", "Yes", "No", "Yes", "Yes")))

### Table A11: 1920 taxes --> Change in 1940-1980 % Afrodes
stargazer(m3a, m3b, m3c, m4a, m4b, m4c, dep.var.caption = "",
          se=list(cl.se.m3a, cl.se.m3b, cl.se.m3c, cl.se.m4a, cl.se.m4b, cl.se.m4c), 
          title="Racial Demographic Change as a Function of Past State Capacity (1940-1980)", 
          dep.var.labels=c("Abs. \\Delta, 1940-1980)", "Rel. \\Delta, 1940-1980"),
          covariate.labels=c("Fisc. Cap., 1920", "Afro-des. \\%, 1940"),
          align=TRUE, no.space=TRUE, column.sep.width = "-10pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "Yes", "No", "Yes", "Yes"),
                           c("Geographic controls", "No", "Yes", "Yes", "No", "Yes", "Yes")))

### Additional tests

# Table A6: Replicating Table 1 and capacity in 1985-1991

m0t <- lm(afrodes.prop.1991 ~ ln.pc.receita1920.sd, data)
m1t <- lm(afrodes.prop.1991 ~ ln.pc.receita1920.sd +
            state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m0c <- lm(ln.pc.receita1985.sd ~ ln.pc.receita1920.sd, data)
m1c <- lm(ln.pc.receita1985.sd ~ ln.pc.receita1920.sd +
            state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m0a <- lm(basic.goods.1991.sd ~ ln.pc.receita1920.sd, data)
m1a <- lm(basic.goods.1991.sd ~ ln.pc.receita1920.sd +
            state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.m0t <- sqrt(diag(cluster.vcov(m0t, data$amc1920)))
cl.se.m1t <- sqrt(diag(cluster.vcov(m1t, data$amc1920)))
cl.se.m0c <- sqrt(diag(cluster.vcov(m0c, data$amc1920)))
cl.se.m1c <- sqrt(diag(cluster.vcov(m1c, data$amc1920)))
cl.se.m0a <- sqrt(diag(cluster.vcov(m0a, data$amc1920)))
cl.se.m1a <- sqrt(diag(cluster.vcov(m1a, data$amc1920)))

stargazer(m0c, m1c, m0a, m1a, m0t, m1t, dep.var.caption = "",
          se=list(cl.se.m0c, cl.se.m1c, cl.se.m0a, cl.se.m1a, cl.se.m0t, cl.se.m1t),
          label = "tab:capacity1991",
          title="Present Public Outcomes and Racial Demography as a Function of Past State Capacity (1985-1991)", 
          dep.var.labels=c("Fiscal Capacity, ", "Access to Basic Goods, ", "Afro-descendants \\%, "),
          covariate.labels=c("Fiscal Capacity, 1920"),
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes", "No", "Yes")))

# Table A7: Replicating Table 1 and capacity in 2010

m0t <- lm(afrodes.prop.2010 ~ ln.pc.receita1920.sd, data)
m1t <- lm(afrodes.prop.2010 ~ ln.pc.receita1920.sd +
            state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m0c <- lm(ln.pc.receita2010.sd ~ ln.pc.receita1920.sd, data)
m1c <- lm(ln.pc.receita2010.sd ~ ln.pc.receita1920.sd +
                                 state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
m0a <- lm(pg.2010.sd ~ ln.pc.receita1920.sd, data)
m1a <- lm(pg.2010.sd ~ ln.pc.receita1920.sd +
                       state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.m0t <- sqrt(diag(cluster.vcov(m0t, data$amc1920)))
cl.se.m1t <- sqrt(diag(cluster.vcov(m1t, data$amc1920)))
cl.se.m0c <- sqrt(diag(cluster.vcov(m0c, data$amc1920)))
cl.se.m1c <- sqrt(diag(cluster.vcov(m1c, data$amc1920)))
cl.se.m0a <- sqrt(diag(cluster.vcov(m0a, data$amc1920)))
cl.se.m1a <- sqrt(diag(cluster.vcov(m1a, data$amc1920)))

stargazer(m0c, m1c, m0a, m1a, m0t, m1t, dep.var.caption = "",
          se=list(cl.se.m0c, cl.se.m1c, cl.se.m0a, cl.se.m1a, cl.se.m0t, cl.se.m1t),
          label = "tab:capacity2010",
          title="Present Public Outcomes and Racial Demography as a Function of Past State Capacity", 
          dep.var.labels=c("Fiscal Capacity, ", "Public Goods, ", "Afro-descendants \\%, "),
          covariate.labels=c("Fiscal Capacity, 1920"),
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes", "No", "Yes")))

##### Table A8: Quilombos, Past State Presence and Share of Afro-descendants

fit3a <- lm(bureaucracy.1872 ~ quil.sd, data)
fit3b <- lm(bureaucracy.1872 ~ quil.sd +
                               state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
fit4a <- lm(afrodes.prop.1872 ~ quil.sd, data)
fit4b <- lm(afrodes.prop.1872 ~ quil.sd +
                               state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.fit3a <- sqrt(diag(cluster.vcov(fit3a, data$amc1920)))
cl.fit3b <- sqrt(diag(cluster.vcov(fit3b, data$amc1920)))
cl.fit4a <- sqrt(diag(cluster.vcov(fit4a, data$amc1920)))
cl.fit4b <- sqrt(diag(cluster.vcov(fit4b, data$amc1920)))

stargazer(fit3a, fit3b, fit4a, fit4b, dep.var.caption = "",
          se=list(cl.fit3a, cl.fit3b, cl.fit4a, cl.fit4b), 
          title="Quilombos, Past State Presence and Share of Afro-descendants (1872)", 
          dep.var.labels=c("Bureaucracy Presence, ", "Afro-descendants \\%, "),
          covariate.labels=c("Quilombo Presence"),
          align=TRUE, no.space=TRUE, column.sep.width = "5pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F,  notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes")))

#################################################

### ALTERNATIVE MEASURES OF STATE CAPACITY ###

# Number of public officials in 1920

M0aBURS <- lm(pg.2000.sd ~ ln.admpub.1920.sd, data)
M1aBURS <- lm(pg.2000.sd ~ ln.admpub.1920.sd + 
                           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
M0bBURS <- lm(afrodes.prop.2000 ~ ln.admpub.1920.sd, data)
M1bBURS <- lm(afrodes.prop.2000 ~ ln.admpub.1920.sd + 
                           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
cl.se.M0aBURS <- sqrt(diag(cluster.vcov(M0aBURS, data$amc1920)))
cl.se.M1aBURS <- sqrt(diag(cluster.vcov(M1aBURS, data$amc1920)))
cl.se.M0bBURS <- sqrt(diag(cluster.vcov(M0bBURS, data$amc1920)))
cl.se.M1bBURS <- sqrt(diag(cluster.vcov(M1bBURS, data$amc1920)))

# Railroads in 1920

M0aRAIL <- lm(pg.2000.sd ~ ln.rail1920.sd, data)
M1aRAIL <- lm(pg.2000.sd ~ ln.rail1920.sd + 
                state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
M0bRAIL <- lm(afrodes.prop.2000 ~ ln.rail1920.sd, data)
M1bRAIL <- lm(afrodes.prop.2000 ~ ln.rail1920.sd + 
                state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
cl.se.M0aRAIL <- sqrt(diag(cluster.vcov(M0aRAIL, data$amc1920)))
cl.se.M1aRAIL <- sqrt(diag(cluster.vcov(M1aRAIL, data$amc1920)))
cl.se.M0bRAIL <- sqrt(diag(cluster.vcov(M0bRAIL, data$amc1920)))
cl.se.M1bRAIL <- sqrt(diag(cluster.vcov(M1bRAIL, data$amc1920)))

# Presence of bureaucracy in 1872

M0aBURP <- lm(pg.2000.sd ~ bureaucracy.1872, data)
M1aBURP <- lm(pg.2000.sd ~ bureaucracy.1872 + 
                state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
M0bBURP <- lm(afrodes.prop.2000 ~ bureaucracy.1872, data)
M1bBURP <- lm(afrodes.prop.2000 ~ bureaucracy.1872 + 
                state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
cl.se.M0aBURP <- sqrt(diag(cluster.vcov(M0aBURP, data$amc1920)))
cl.se.M1aBURP <- sqrt(diag(cluster.vcov(M1aBURP, data$amc1920)))
cl.se.M0bBURP <- sqrt(diag(cluster.vcov(M0bBURP, data$amc1920)))
cl.se.M1bBURP <- sqrt(diag(cluster.vcov(M1bBURP, data$amc1920)))

# Geographic Remoteness (as measured recently) 

M0aREMO <- lm(pg.2000.sd ~ ln.proximity.sd, data)
M1aREMO <- lm(pg.2000.sd ~ ln.proximity.sd + 
                state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
M0bREMO <- lm(afrodes.prop.2000 ~ ln.proximity.sd, data)
M1bREMO <- lm(afrodes.prop.2000 ~ ln.proximity.sd + 
                state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)
cl.se.M0aREMO <- sqrt(diag(cluster.vcov(M0aREMO, data$amc1920)))
cl.se.M1aREMO <- sqrt(diag(cluster.vcov(M1aREMO, data$amc1920)))
cl.se.M0bREMO <- sqrt(diag(cluster.vcov(M0bREMO, data$amc1920)))
cl.se.M1bREMO <- sqrt(diag(cluster.vcov(M1bREMO, data$amc1920)))

### Tables

stargazer(M0aBURS, M1aBURS, M0bBURS, M1bBURS, dep.var.caption = "",
          se=list(cl.se.M0aBURS, cl.se.M1aBURS, cl.se.M0bBURS, cl.se.M1bBURS),
          label = "tab:capacityALT",
          title="Present Public Goods Outcomes and Racial Demography as a Function of Past State Capacity (Alternative Measures)", 
          dep.var.labels=c("Public Goods, ", "Afro-descendants \\%, "),
          covariate.labels=c("Public Admin. Officials, 1920"),
          align=TRUE, no.space=TRUE, column.sep.width = "5pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes")))

stargazer(M0aRAIL, M1aRAIL, M0bRAIL, M1bRAIL, dep.var.caption = "",
          se=list(cl.se.M0aRAIL, cl.se.M1aRAIL, cl.se.M0bRAIL, cl.se.M1bRAIL), 
          title="Present Public Goods Outcomes and Racial Demography as a Function of Past State Capacity (Alternative Measures)", 
          dep.var.labels=c("Public Goods, ", "Afro-descendants \\%, "),
          covariate.labels=c("Railroads, 1920"),
          align=TRUE, no.space=TRUE, column.sep.width = "5pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes")))

stargazer(M0aREMO, M1aREMO, M0bREMO, M1bREMO, dep.var.caption = "",
          se=list(cl.se.M0aREMO, cl.se.M1aREMO, cl.se.M0bREMO, cl.se.M1bREMO), 
          title="Present Public Goods Outcomes and Racial Demography as a Function of Past State Capacity (Alternative Measures)", 
          dep.var.labels=c("Public Goods, ", "Afro-descendants \\%, "),
          covariate.labels=c("Geographic Remoteness (Reverse Coded)"),
          align=TRUE, no.space=TRUE, column.sep.width = "5pt", 
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes"),
                           c("Geographic controls", "No", "Yes", "No", "Yes")))

#### Table C1: Past state capacity --> Land Value, Enforcement, Immigrants

m0 <- lm(ln.value.land.acre1920.sd ~ ln.pc.receita1920.sd, data)
m1 <- lm(ln.value.land.acre1920.sd ~ ln.pc.receita1920.sd +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.m0 <- sqrt(diag(cluster.vcov(m0, data$amc1920)))
cl.se.m1 <- sqrt(diag(cluster.vcov(m1, data$amc1920)))

m2 <- lm(afrodes.prop.2000 ~ ln.value.land.acre1920.sd, data)
m3 <- lm(afrodes.prop.2000 ~ ln.value.land.acre1920.sd +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.m2 <- sqrt(diag(cluster.vcov(m2, data$amc1920)))
cl.se.m3 <- sqrt(diag(cluster.vcov(m3, data$amc1920)))

e0 <- lm(ln.forcapub.1920.sd ~ ln.pc.receita1920.sd, data)
e1 <- lm(ln.forcapub.1920.sd ~ ln.pc.receita1920.sd +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.e0 <- sqrt(diag(cluster.vcov(e0, data$amc1920)))
cl.se.e1 <- sqrt(diag(cluster.vcov(e1, data$amc1920)))

e2 <- lm(afrodes.prop.2000 ~ ln.forcapub.1920.sd, data)
e3 <- lm(afrodes.prop.2000 ~ ln.forcapub.1920.sd +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.e2 <- sqrt(diag(cluster.vcov(e2, data$amc1920)))
cl.se.e3 <- sqrt(diag(cluster.vcov(e3, data$amc1920)))

i0 <- lm(ln.pc.immigrants1920.sd ~ ln.pc.receita1920.sd, data)
i1 <- lm(ln.pc.immigrants1920.sd ~ ln.pc.receita1920.sd +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.i0 <- sqrt(diag(cluster.vcov(i0, data$amc1920)))
cl.se.i1 <- sqrt(diag(cluster.vcov(i1, data$amc1920)))

i2 <- lm(afrodes.prop.2000 ~ ln.pc.immigrants1920.sd, data)
i3 <- lm(afrodes.prop.2000 ~ ln.pc.immigrants1920.sd +
           state + area.log + altitude + rain + sun + ln.dist.coast + ln.dist.capital + longitude + latitude, data)

cl.se.i2 <- sqrt(diag(cluster.vcov(i2, data$amc1920)))
cl.se.i3 <- sqrt(diag(cluster.vcov(i3, data$amc1920)))

stargazer(m0, m1, e0, e1, i0, i1, dep.var.caption = "", 
          se=list(cl.se.m0, cl.se.m1, cl.se.e0, cl.se.e1, cl.se.i0, cl.se.i1), 
          title="Past State Capacity and Average Land Value (per acre), Size of Law Enforcement Apparatus, and Share of Immigrants", 
          dep.var.labels=c("Land Value,", "Law Enforcement", "Immigrants (%)"),
          covariate.labels=c("Fiscal Capacity, 1920"),
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes", "No", "Yes"), c("Geographic controls", "No", "Yes", "No", "Yes", "No", "Yes")))

### Table C2: Land Value, Enforcement, Immigrants --> Current % Afrodes

stargazer(m2, m3, e2, e3, i2, i3,
          dep.var.caption = "",
          se=list(cl.se.m2, cl.se.m3, cl.se.e2, cl.se.e3, cl.se.i2, cl.se.i3), 
          title="Past Land Value, Size of Law Enforcement Apparatus, Immigrants and Contemporary Share of Afro-descendants", 
          dep.var.labels=c("Afro-descendants \\%, "),
          covariate.labels=c("Land Value, 1920", "Law Enforcement Officials, 1920", "immigrants %, 1920"),
          omit = c("state", "area.log", "altitude", "rain", "sun", "ln.dist.coast", "ln.dist.capital", "longitude", "latitude", "Constant"),
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes"),
                           c("Geographic controls", "No", "Yes")))



################################################################

# Table E1

# PESB survey 

library(haven)
pesb <- read.csv("pesb.csv", stringsAsFactors = FALSE, header = T)

# Types of mismatch
pesb$mismatch.black.bias <- ifelse((pesb$color.self == 1 | pesb$color.self == 2) & pesb$color.attrib == 3, 1, 0)
pesb$mismatch.white.bias <- ifelse((pesb$color.attrib == 1 | pesb$color.attrib == 2) & pesb$color.self == 3, 1, 0)

pesb0 <- lm(mismatch.black.bias ~ ln.pc.receita1920.sd, data = pesb)
pesb1 <- lm(mismatch.black.bias ~ ln.pc.receita1920.sd + as.factor(uf), data = pesb)
pesb2 <- lm(mismatch.white.bias ~ ln.pc.receita1920.sd, data = pesb)
pesb3 <- lm(mismatch.white.bias ~ ln.pc.receita1920.sd + as.factor(uf), data = pesb)

# cluster-robust standard errors
cl.se.pesb0 <- sqrt(diag(cluster.vcov(pesb0, pesb$municipi)))
cl.se.pesb1 <- sqrt(diag(cluster.vcov(pesb1, pesb$municipi)))
cl.se.pesb2 <- sqrt(diag(cluster.vcov(pesb2, pesb$municipi)))
cl.se.pesb3 <- sqrt(diag(cluster.vcov(pesb3, pesb$municipi)))

stargazer(pesb0, pesb1, pesb2, pesb3,
          dep.var.caption = "",
          keep = "ln.pc.receita1920.sd",
          covariate.labels=c("Fiscal Capacity, 1920"),
          se=list(cl.se.pesb0, cl.se.pesb1, cl.se.pesb2, cl.se.pesb3), 
          title="Mismatches in Self- and Interviewer Racial Classification and Local State Capacity", 
          dep.var.labels=c("Mismatch Black Bias", "Mismatch White Bias"),
          omit.stat=c("rsq", "ser", "f"), notes = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes")))

# Table A3

# LatinoBarometro 2005 survey

lb2005 <- read.csv("latinobar.csv", stringsAsFactors = FALSE, header = T)

lb0 <- lm(att.tax ~ ln.pc.receita1920.sd 
          + age + educ + econ + tamciud + sun + rain
          , data = lb2005)
cl.se.lb0 <-  sqrt(diag(cluster.vcov(lb0, lb2005$ciu)))

lb1 <- lm(att.tax ~ ln.pc.receita1920.sd 
          + age + educ + econ + tamciud + sun + rain
          + as.factor(state), data = lb2005)
cl.se.lb1 <-  sqrt(diag(cluster.vcov(lb1, lb2005$ciu)))

lb2 <- lm(conf.local  ~ ln.pc.receita1920.sd 
          + age + educ + econ + tamciud + sun + rain
          , data = lb2005)
cl.se.lb2 <-  sqrt(diag(cluster.vcov(lb2, lb2005$ciu)))

lb3 <- lm(conf.local ~ ln.pc.receita1920.sd
          + age + educ + econ + tamciud + sun + rain
          + as.factor(state), data = lb2005)
cl.se.lb3 <-  sqrt(diag(cluster.vcov(lb3, lb2005$ciu)))

stargazer(lb0, lb1, lb2, lb3,
          se=list(cl.se.lb0, cl.se.lb1, cl.se.lb2, cl.se.lb3), 
          title="Past Fiscal Capacity, Contemporary Attitudes Toward Taxation and Reported Confidence in the Local Governemnt", 
          keep = "ln.pc.receita1920.sd",
          omit.stat=c("rsq", "ser", "f"), notes = "", notes.append = F, notes.label = "",
          star.char = c("+", "*", "**", "***"), star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          add.lines = list(c("State FE", "No", "Yes", "No", "Yes"),
                           c("Additional controls", "Yes", "Yes", "Yes", "Yes")))



####################################
# Conley Std Errors for Tables 1-3

library(dplyr)
library(lfe)
library(Matrix)
library(sandwich)
library(HistData)
library(robustbase)
library(dummies)

# Functions written by: Fiona Burlig (https://www.fionaburlig.com/)
as.tbl_df <- function(data) { 
  dataset <- as.data.frame(data) %>%
    tbl_df()
}

tbldfGrabber <- function(data, varnames) { 
  matrixObject <- data %>%
    select_(.dots = varnames) %>%
    as.matrix() 
}

miniOLS <- function(data, y, X) {
  n <- nrow(data)
  k <- length(X)
  ydata <- tbldfGrabber(data, y)
  xdata <- tbldfGrabber(data, X)
  betahat <- solve(t(xdata) %*% xdata) %*% t(xdata) %*% ydata 
  return(betahat)
}

miniResids <- function(data, y, X) {
  n <- nrow(data)
  k <- length(X)
  ydata <- tbldfGrabber(data, y)
  xdata <- tbldfGrabber(data, X)
  betahat <- solve(t(xdata) %*% xdata) %*% t(xdata) %*% ydata 
  e <- ydata - xdata %*% betahat
  output <- list(xdata, e)
  return(output) 
}


olsConley <- function(data, y, X, lat, lon, cutoff) { 
  n <- nrow(data)
  k <- length(X)
  ydata <- tbldfGrabber(data, y)
  xdata <- tbldfGrabber(data, X)
  betahat <- solve(t(xdata) %*% xdata) %*% t(xdata) %*% ydata
  e <- ydata - xdata %*% betahat
  latdata <- tbldfGrabber(data, lat)
  londata <- tbldfGrabber(data, lon)
  mtWeight <- lapply(1:n, function(i) {
    lonscale <- cos(latdata[i]*pi / 180) * 111
    latscale <- 111
    dist <- as.numeric(sqrt((latscale*(latdata[i] - latdata))^2
                            + (lonscale*(londata[i] - londata))^2))
    window <- as.numeric(dist <= cutoff)
    XeeXh <- ((t(t(xdata[i, ])) %*% matrix(1, 1, n) * e[i,]) * 
                (matrix(1, k, 1) %*% (t(e) * t(window)))) %*% xdata 
    return(XeeXh)
  })
  
  mt <- (Reduce("+", mtWeight)) / n
  brd <- solve(t(xdata) %*% xdata)
  sandwich <- n* (t(brd) %*% mt %*% brd) 
  se <- sqrt(diag(sandwich))
  output <- list(betahat, se)
  names(output) <- c("betahat", "conleySE")
  return(output)
}


data$state.f <- as.factor(data$state)
data1 <- dummy.data.frame(data, names = "state.f" , sep = ".")

quilData <- as.tbl_df(data1) %>%
  mutate(ones = 1)

quilData <- quilData[,c("afrodes.prop.2000", "quil.sd", "pg.2000.sd", "ones", "ln.pc.receita1920.sd", "area.log", "altitude", 
                          "rain", "sun", "ln.dist.coast",
                          "ln.dist.capital", "latitude", "longitude",
                          "state.f.AC", "state.f.AL", "state.f.AM", "state.f.AP", "state.f.BA", "state.f.CE",
                          "state.f.ES", "state.f.GO", "state.f.MA", "state.f.MG", "state.f.MS", "state.f.MT",
                          "state.f.PA", "state.f.PB", "state.f.PE", "state.f.PI", "state.f.PR", "state.f.RJ",
                          "state.f.RN", "state.f.RO", "state.f.RR", "state.f.RS", "state.f.SC", "state.f.SE",
                          "state.f.SP", "state.f.TO")]
quilData <- na.omit(quilData) 

# conley SEs Table 1 fiscal capacity 1920 -> pg 2000
pg.Conley <- olsConley(quilData, "pg.2000.sd", 
                       c("ones", "ln.pc.receita1920.sd", "area.log", "altitude", 
                         "rain", "sun", "ln.dist.coast", "ln.dist.capital",
                         "state.f.AC", "state.f.AL", "state.f.AM", "state.f.AP", "state.f.BA", "state.f.CE",
                         "state.f.ES", "state.f.GO", "state.f.MA", "state.f.MG", "state.f.MS", "state.f.MT",
                         "state.f.PA", "state.f.PB", "state.f.PE", "state.f.PI", "state.f.PR", "state.f.RJ",
                         "state.f.RN", "state.f.RO", "state.f.RR", "state.f.RS", "state.f.SC", "state.f.SE",
                         "state.f.TO"), "latitude", "longitude", 200)
pg.Conley

# conley SEs Table 1 fiscal capacity 1920 -> afrodes 2000
afrodes.Conley <- olsConley(quilData, "afrodes.prop.2000", 
                            c("ones", "ln.pc.receita1920.sd", "area.log", "altitude", 
                              "rain", "sun", "ln.dist.coast", "ln.dist.capital",
                              "state.f.AC", "state.f.AL", "state.f.AM", "state.f.AP", "state.f.BA", "state.f.CE",
                              "state.f.ES", "state.f.GO", "state.f.MA", "state.f.MG", "state.f.MS", "state.f.MT",
                              "state.f.PA", "state.f.PB", "state.f.PE", "state.f.PI", "state.f.PR", "state.f.RJ",
                              "state.f.RN", "state.f.RO", "state.f.RR", "state.f.RS", "state.f.SC", "state.f.SE",
                              "state.f.TO"), "latitude", "longitude", 200)
afrodes.Conley


# conley SEs Table 2 quilombos -> fiscal capacity 1920
quil.tx.Conley <- olsConley(quilData, "ln.pc.receita1920.sd", 
                            c("ones", "quil.sd", "area.log", "altitude", 
                              "rain", "sun", "ln.dist.coast", "ln.dist.capital",
                              "state.f.AC", "state.f.AL", "state.f.AM", "state.f.AP", "state.f.BA", "state.f.CE",
                              "state.f.ES", "state.f.GO", "state.f.MA", "state.f.MG", "state.f.MS", "state.f.MT",
                              "state.f.PA", "state.f.PB", "state.f.PE", "state.f.PI", "state.f.PR", "state.f.RJ",
                              "state.f.RN", "state.f.RO", "state.f.RR", "state.f.RS", "state.f.SC", "state.f.SE",
                              "state.f.TO"), "latitude", "longitude", 200)
quil.tx.Conley


# conley SEs Table 3 quilombos -> afrodes 2000
quil.afro.Conley <- olsConley(quilData, "afrodes.prop.2000", 
                              c("ones", "quil.sd", "area.log", "altitude", 
                                "rain", "sun", "ln.dist.coast", "ln.dist.capital",
                                "state.f.AC", "state.f.AL", "state.f.AM", "state.f.AP", "state.f.BA", "state.f.CE",
                                "state.f.ES", "state.f.GO", "state.f.MA", "state.f.MG", "state.f.MS", "state.f.MT",
                                "state.f.PA", "state.f.PB", "state.f.PE", "state.f.PI", "state.f.PR", "state.f.RJ",
                                "state.f.RN", "state.f.RO", "state.f.RR", "state.f.RS", "state.f.SC", "state.f.SE",
                                "state.f.TO"), "latitude", "longitude", 200)
quil.afro.Conley


####################################

# Maps (Figure 4)
library(ggplot2)
library(ggthemes)
library(rgdal)


# country border
brazil <- readOGR(dsn = "shapefile", layer = "BRA_adm0")
brazil <- spTransform(brazil, CRS("+proj=poly +lat_0=0 +lon_0=-54 +x_0=0 +y_0=0 +ellps=GRS80 +units=m +no_defs"))
brazil_f <- fortify(brazil)

# 2000 borders
brazil2000 <- readOGR(dsn = "shapefile", layer = "brazil2000")
brazil2000_f <- fortify(brazil2000)
brazil2000$id <- row.names(brazil2000)
brazil2000_f <- left_join(brazil2000_f, brazil2000@data) 

# Quilombos location
quil <- brazil2000[which(brazil2000$quil_nm > 0), ]
quil <- coordinates(spTransform(quil, CRS("+proj=poly +lat_0=0 +lon_0=-54 +x_0=0 +y_0=0 +ellps=GRS80 +units=m +no_defs")))

quil <- as.data.frame(quil)
names(quil)[1] <- "long"
names(quil)[2] <- "lat"


# Map fiscal capacity 1920 and quilombos 
ggplot(brazil2000_f, aes(long, lat, group = group)) +
  geom_polygon(aes(fill=`l__1920`))  +
  coord_equal() +
  scale_fill_gradient(low = "white", high = "#006699", 
                      name = "Tax revenues per\ncapita (log), 1920") + 
  theme_map() +
  geom_path(data = brazil_f, colour="gray70", lwd=0.1) +
  geom_point(data = quil, aes(x = long, y = lat, group = NULL), size = 0.2, col="black")


# Map afrodes 2000 and quilombos 
ggplot(brazil2000_f, aes(long, lat, group = group)) +
  geom_polygon(color = "transparent", aes(fill=`a__2000`))  + 
  coord_equal() +
  scale_fill_gradient(low = "white", high = "#CB454A", 
                      name = "Afro-descendant share, 2000") + 
  theme_map() +
  geom_path(data = brazil_f, colour="gray90", lwd=0.05) +
  geom_point(data = quil, aes(x = long, y = lat, group = NULL), size = 0.2, col="black")

